# 加载nnet包
if (!require(nnet)) install.packages("nnet")
library(nnet)
library(caret)
library(pROC)
library(MatchIt)

# 设置工作目录并加载数据
setwd("${path}")
mydata <- read.csv("data.csv")
# 确保 ${hotspot} 是因子，并且值为 "event_0" 和 "event_1"
mydata$${hotspot} <- factor(mydata$${hotspot}, levels = c("0", "1"), labels = c("event_0", "event_1"))


# 使用 matchit 进行 1:3 匹配
m.out <- matchit(${hotspot} ~ ${independent_and}, 
                 data = mydata, 
                 method = "nearest", 
                 ratio = 3)

# 提取匹配后的数据
matched_data <- match.data(m.out)

# 训练神经网络模型
model_nn <- nnet(
  ${hotspot} ~ ${independent_and},
  data = matched_data,
  size = 5,  # 隐藏层大小
  decay = 0.1,  # 正则化参数
  maxit = 1000  # 最大迭代次数
)

# 预测和评估
predictions_nn <- predict(model_nn, newdata = matched_data, type = "raw")
predicted_probs_nn <- predictions_nn
predicted_classes_nn <- factor(ifelse(predicted_probs_nn >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))

# 添加预测结果到数据集
mydata_with_predictions_nn <- matched_data %>%
  mutate(
    ${hotspot}_probability = predicted_probs_nn,
    ${hotspot}_predicted_class = predicted_classes_nn,
    ${hotspot}_score = NA  # NN没有评分卡
  )
# 计算 ${hotspot} 和 ${hotspot}_predicted_class 相同的数据数量
matching_rows <- mydata_with_predictions_nn %>%
  filter(${hotspot} == ${hotspot}_predicted_class) %>%
  nrow()
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", matching_rows, "\n")
# 可选：计算准确率（匹配行数 / 总行数）
total_rows <- nrow(matched_data)
accuracy <- matching_rows / total_rows
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", accuracy, "\n")
# 这是建模集的结果
write.csv(mydata_with_predictions_nn, file = "mydata_with_predictions_nn.csv", row.names = FALSE)

#### 混淆矩阵 ####
# 混淆矩阵
conf_matrix <- confusionMatrix(data = predicted_classes_nn, reference = matched_data$${hotspot})

sink("confusionMatrix_nn.txt")
print(conf_matrix)
sink()
#### ROC曲线 ####
# ROC曲线
Cairo::CairoTIFF(file="roc_nn.tiff", width=800, height=800,units="in",dpi=150)
roc_curve <- roc(matched_data$${hotspot}, predictions_nn[,1], plot=TRUE, print.auc=TRUE, col="darkgreen", lwd=2, main="ROC Curve")
dev.off()
#### 校准图 ####
# 计算实际比例与预测概率的关系
calib_data <- mydata_with_predictions_nn %>%
  mutate(bin = ntile(${hotspot}_probability, 10)) %>% # 将预测概率分为10个区间
  group_by(bin) %>%
  summarise(
    mean_pred = mean(${hotspot}_probability, na.rm = TRUE), # 每个区间的平均预测概率
    actual_rate = mean(as.numeric(${hotspot}) - 1, na.rm = TRUE),    # 每个区间的真实发生率
    .groups = 'drop'
  )
Cairo::CairoTIFF(file="cal_nn.tiff", width=800, height=800,units="in",dpi=150)
# 绘制校准图
ggplot(calib_data, aes(x=mean_pred, y=actual_rate)) +
  geom_point(size=3) + # 绘制点
  geom_line(linetype="dashed", size=1) + # 连接点
  geom_abline(intercept=0, slope=1, linetype="solid", color="red") + # 理想校准线
  labs(title="Calibration Plot for Neural Network Model",
       x="Predicted Probability",
       y="Actual Rate") +
  theme_minimal()
dev.off()
mydata1 <- read.csv("result.csv")
# 预测和评估
predictions_nn <- predict(model_nn, newdata = mydata1, type = "raw")
predicted_probs_nn <- predictions_nn
predicted_classes_nn <- factor(ifelse(predicted_probs_nn >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))

# 添加预测结果到数据集
result_with_predictions_nn <- mydata1 %>%
  mutate(
    ${hotspot}_probability = predicted_probs_nn,
    ${hotspot}_predicted_class = predicted_classes_nn,
    ${hotspot}_score = NA  # NN没有评分卡
  )
# 这是结局的结果
write.csv(result_with_predictions_nn, file = "result_with_predictions_nn.csv", row.names = FALSE)
